####################################################################################
## “Do Provinces that Win the Spanish Christmas Lottery Experience a Surge in Incumbent 
## Popularity? An Out-of-Sample Replication of Bagues and Esteve-Volart (2016)”, 
## by Carolina Bernal, Donald P. Green, and David Vilalta
## What for: Creates Figure 3 in the paper
####################################################################################

library(fixest)
library(ggplot2)
library(dplyr)
library(car)
library(haven)

# =========================
# Paths (edit only BASE_DIR)
# =========================
BASE_DIR <- "~/NEW AND FINAL REPPKG"

DATA_FILE <- file.path(
  BASE_DIR, "Data", "Our_data",
  "20250531_SpanishLottery_Complete_province.dta"
)

FIG_DIR <- file.path(BASE_DIR, "Results", "Figures_Electoral")
dir.create(FIG_DIR, recursive = TRUE, showWarnings = FALSE)

# Load the dataset
data <- read_dta(DATA_FILE)

# Helper: quietly close a plotting device if one is open
close_device_if_open <- function() {
  if (dev.cur() > 1) try(dev.off(), silent = TRUE)
}

################################################################################
# Replication period  (Figure 3a)
################################################################################

complete_data <- data %>%
  mutate(D_housing_price_2 = if_else(is.na(D_housing_price_2), housing_price_growth_term_1, D_housing_price_2)) %>%
  filter(year < 2009) %>%
  select(province, year, var_votes_inc_ours, top_prizes_gdp_term_2, expenditure_gdp_term_2, 
         D_unemployment_rate_2, D_gdp_pc_2, D_housing_price_2, D_cpi_2, province_num, population) %>% 
  na.omit()

full_model <- lm(
  var_votes_inc_ours ~ top_prizes_gdp_term_2 + 
    expenditure_gdp_term_2 * factor(year) + 
    D_unemployment_rate_2 + 
    D_gdp_pc_2 + 
    D_housing_price_2 + 
    D_cpi_2 + 
    factor(year) + 
    factor(province_num),
  data = complete_data,
  weights = population
)

avPlot <- avPlots(full_model, terms = "top_prizes_gdp_term_2", id.n = 0, pch = 19, cex = 0.5, col = "blue", plot = FALSE)
avPlots(full_model, terms = "top_prizes_gdp_term_2", id.n = 4, pch = 19, cex = 0.5, col = "blue")
close_device_if_open()

plot_data <- as.data.frame(avPlot[[1]])
plot_data$population <- complete_data$population
plot_data$row_number <- rownames(plot_data)

p_rep <- ggplot(plot_data, aes(x = top_prizes_gdp_term_2, y = var_votes_inc_ours, size = population)) +
  geom_point(alpha = 0.5, color = "brown") +
  geom_smooth(method = "lm", se = TRUE, fill = "blue", alpha = 0.2) +
  labs(x = "Prizes as % of GDP | X",
       y = "Delta Incumbent Votes | X",
       title = "",
       size = "Population") +
  theme_minimal() +
  theme(legend.position = "none")

ggsave(filename = file.path(FIG_DIR, "figure3a.png"), plot = p_rep, width = 6.5, height = 4.5, dpi = 300)

################################################################################
# Out-of-sample period  (Figure 3b)
################################################################################

complete_data <- data %>%
  mutate(D_housing_price_2 = if_else(is.na(D_housing_price_2), housing_price_growth_term_1, D_housing_price_2)) %>%
  filter(year > 2008) %>%
  select(province, year, var_votes_inc_ours, top_prizes_gdp_term_2, expenditure_gdp_term_2, 
         D_unemployment_rate_2, D_gdp_pc_2, D_housing_price_2, D_cpi_2, province_num, population) %>% 
  na.omit()

full_model <- lm(
  var_votes_inc_ours ~ top_prizes_gdp_term_2 + 
    expenditure_gdp_term_2 * factor(year) + 
    D_unemployment_rate_2 + 
    D_gdp_pc_2 + 
    D_housing_price_2 + 
    D_cpi_2 + 
    factor(year) + 
    factor(province_num),
  data = complete_data,
  weights = population
)

avPlot <- avPlots(full_model, terms = "top_prizes_gdp_term_2", id.n = 0, pch = 19, cex = 0.5, col = "blue", plot = FALSE)
avPlots(full_model, terms = "top_prizes_gdp_term_2", id.n = 4, pch = 19, cex = 0.5, col = "blue")
close_device_if_open()

plot_data <- as.data.frame(avPlot[[1]])
plot_data$population <- complete_data$population
plot_data$row_number <- rownames(plot_data)

p_oos <- ggplot(plot_data, aes(x = top_prizes_gdp_term_2, y = var_votes_inc_ours, size = population)) +
  geom_point(alpha = 0.5, color = "brown") +
  geom_smooth(method = "lm", se = TRUE, fill = "blue", alpha = 0.2) +
  labs(x = "Prizes as % of GDP | X",
       y = "Delta Incumbent Votes | X",
       title = "",
       size = "Population") +
  theme_minimal() +
  theme(legend.position = "none")

ggsave(filename = file.path(FIG_DIR, "figure3b.png"), plot = p_oos, width = 6.5, height = 4.5, dpi = 300)

################################################################################
# Pooled  (Figure 3c)
################################################################################

complete_data <- data %>%
  mutate(D_housing_price_2 = if_else(is.na(D_housing_price_2), housing_price_growth_term_1, D_housing_price_2)) %>%
  select(province, year, var_votes_inc_ours, top_prizes_gdp_term_2, expenditure_gdp_term_2, 
         D_unemployment_rate_2, D_gdp_pc_2, D_housing_price_2, D_cpi_2, province_num, population) %>% 
  na.omit()

full_model <- lm(
  var_votes_inc_ours ~ top_prizes_gdp_term_2 + 
    expenditure_gdp_term_2 * factor(year) + 
    D_unemployment_rate_2 + 
    D_gdp_pc_2 + 
    D_housing_price_2 + 
    D_cpi_2 + 
    factor(year) + 
    factor(province_num),
  data = complete_data,
  weights = population
)

avPlot <- avPlots(full_model, terms = "top_prizes_gdp_term_2", id.n = 0, pch = 19, cex = 0.5, col = "blue", plot = FALSE)
avPlots(full_model, terms = "top_prizes_gdp_term_2", id.n = 4, pch = 19, cex = 0.5, col = "blue")
close_device_if_open()

plot_data <- as.data.frame(avPlot[[1]])
plot_data$population <- complete_data$population
plot_data$row_number <- rownames(plot_data)

p_pool <- ggplot(plot_data, aes(x = top_prizes_gdp_term_2, y = var_votes_inc_ours, size = population)) +
  geom_point(alpha = 0.5, color = "brown") +
  geom_smooth(method = "lm", se = TRUE, fill = "blue", alpha = 0.2) +
  labs(x = "Prizes as % of GDP | X",
       y = "Delta Incumbent Votes | X",
       title = "",
       size = "Population") +
  theme_minimal() +
  theme(legend.position = "none")

ggsave(filename = file.path(FIG_DIR, "figure3c.png"), plot = p_pool, width = 6.5, height = 4.5, dpi = 300)


################################################################################
# ONE COMPOSITE FIGURE (Panels A–C)
################################################################################

library(patchwork)

to_bw <- function(p) {
  p +
    geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.6) +
    theme_minimal() +
    theme(legend.position = "none")
}
p_rep_bw  <- to_bw(p_rep)  + ggtitle("Panel A: Replication Period (1989–2008)")
p_oos_bw  <- to_bw(p_oos)  + ggtitle("Panel B: Out-of-Sample Period (2011–2023)")
p_pool_bw <- to_bw(p_pool) + ggtitle("Panel C: Pooled Period (1989–2023)")

title_theme <- theme(plot.title = element_text(face = "plain", size = 11, hjust = 0))

combined_fig_wide <- (p_rep_bw | p_oos_bw | p_pool_bw) & title_theme

ggsave(
  file.path(FIG_DIR, "fig1wide.eps"),
  plot   = combined_fig_wide,
  device = cairo_ps,          # EPS vector output
  width  = 11.5,
  height = 4.2,
  units  = "in",
  fallback_resolution = 600,  # ensures smooth rendering of points
  bg = "white"                # white background instead of transparent
)

ggsave(
  file.path(FIG_DIR, "fig1wide.pdf"),
  plot   = combined_fig_wide,
  device = cairo_pdf,     # vector PDF output
  width  = 11.5,
  height = 4.2,
  units  = "in",
  dpi    = 600,
  bg     = "white"
)

